OHI British Columbia | OHI Science | Citation policy
knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'Figs/',
echo = TRUE, message = FALSE, warning = FALSE)
library(raster)
library(data.table)
source('~/github/src/R/common.R') ###
### includes library(tidyverse); library(stringr); dir_M points to ohi directory
dir_git <- '~/github/spp_health_dists'
dir_am_data <- file.path(dir_M, 'git-annex/globalprep/_raw_data/aquamaps/d2017')
### goal specific folders and info
dir_setup <- file.path(dir_git, 'data_setup')
dir_data <- file.path(dir_git, 'data')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_health_dists')
# ### provenance tracking
# library(provRmd); prov_setup()
### support scripts
source('~/github/src/R/rast_tools.R')
### raster plotting and analyzing scriptsCreate a map of the distribution of biodiversity - all species in AquaMaps assessed by IUCN (using AquaMaps version of IUCN ID numbers). These maps will be generated using all taxonomic groups:
The following maps will also be generated for taxonomic groups (IUCN-assessed species only): * Mean risk * Variance of risk * Number of species
Species richness as the raw count of species indicated to exist in a particular cell. This is calculated for all species included in the AquaMaps dataset, regardless of IUCN assessment status.
We can consider species presence in several ways:
spp_richness_file <- file.path(dir_data, 'am_all_spp_richness.csv')
if(!file.exists(spp_richness_file)) {
am_spp_cells <- fread(file.path(dir_o_anx, 'am_files/am_spp_cells_all.csv'))
spp_richness_0pct_probwt <- am_spp_cells %>%
group_by(loiczid) %>%
summarize(n_spp_0pct = n(),
n_spp_probwt = sum(prob))
spp_richness_60pct <- am_spp_cells %>%
filter(prob >= .60) %>%
group_by(loiczid) %>%
summarize(n_spp_60pct = n())
spp_richness <- spp_richness_0pct_probwt %>%
full_join(spp_richness_60pct, by = 'loiczid')
write_csv(spp_richness, spp_richness_file)
}spp_richness_file <- file.path(dir_data, 'am_all_spp_richness.csv')
spp_richness <- read_csv(spp_richness_file, col_types = 'dddd') %>%
mutate(log_n_probwt = log(n_spp_probwt))
loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
rich_rast_0pct <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'n_spp_0pct')
rich_rast_60pct <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'n_spp_60pct')
rich_rast_probwt <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'n_spp_probwt')
rich_rast_log_probwt <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'log_n_probwt')
plot(rich_rast_0pct, main = 'Richness, All species, any presence')plot(rich_rast_60pct, main = 'Richness, All species, prob of occur >= 60%')plot(rich_rast_probwt, main = 'Richness, All species, weighted by prob of occur')plot(rich_rast_log_probwt, main = 'Richness, All species, log(prob of occur weighted))')Species range rarity is a measure of endemism that weighs the presence of a species in a particular area relative to its entire range. It is defined in Selig et al 2014 (and referenced from Williams et al. 1996: Promise and problems in applying quantitative complementary areas for representing the diversity of some neotropical plants)
\[Range \hspace{3pt} rarity_{cell} = \sum_{i=1}^N \frac{1}{A_i} \times w\]
Relative range rarity is range rarity divided by species richness to remove confounding effects (again as defined in Selig et al 2014).
As in Selig et al 2014, we will multiply rarity by 100,000 and relative rarity by 1000 for analytical convenience.
This is calculated for all species included in the AquaMaps dataset, regardless of IUCN assessment status.
We will calculate range rarity using the same three cuts as for richness; for each cut, species presence, total range, and richness will all be calculated using the same cut method.
spp_rarity_file <- file.path(dir_data, 'am_all_spp_rarity.csv')
if(!file.exists(spp_rarity_file)) {
if(!exists('am_spp_cells')) {
am_spp_cells <- fread(file.path(dir_o_anx, 'am_files/am_spp_cells_all.csv'))
}
am_spp_ranges <- read_csv(file.path(dir_data, 'am_spp_range_summary.csv'))
spp_cells_ranges <- am_spp_cells %>%
left_join(am_spp_ranges, by = 'am_sid')
spp_rarity_0pct_probwt <- spp_cells_ranges %>%
group_by(loiczid) %>%
summarize(rarity_0pct = sum( 1 / area_km2_0pct),
rarity_probwt = sum(prob / area_km2_probwt))
spp_rarity_60pct <- spp_cells_ranges %>%
filter(prob >= .60) %>%
group_by(loiczid) %>%
summarize(rarity_60pct = sum( 1 / area_km2_60pct, na.rm = TRUE))
spp_rarity <- spp_rarity_0pct_probwt %>%
full_join(spp_rarity_60pct, by = 'loiczid') %>%
mutate(rarity_0pct = rarity_0pct * 100000,
rarity_60pct = rarity_60pct * 100000,
rarity_probwt = rarity_probwt * 100000)
write_csv(spp_rarity, spp_rarity_file)
}spp_rarity <- read_csv(file.path(dir_data, 'am_all_spp_rarity.csv'),
col_types = 'dddd')
spp_richness <- read_csv(file.path(dir_data, 'am_all_spp_richness.csv'),
col_types = 'dddd')
spp_rel_rare <- spp_rarity %>%
left_join(spp_richness, by = 'loiczid') %>%
mutate(rel_rare_0pct = rarity_0pct / n_spp_0pct * 1000,
rel_rare_60pct = rarity_60pct / n_spp_60pct * 1000,
rel_rare_probwt = rarity_probwt / n_spp_probwt * 1000) %>%
select(loiczid, contains('rel_rare'))
write_csv(spp_rel_rare, file.path(dir_data, 'am_all_spp_rel_rarity.csv'))spp_rarity <- read_csv(file.path(dir_data, 'am_all_spp_rarity.csv'),
col_types = 'dddd')
spp_rel_rare <- read_csv(file.path(dir_data, 'am_all_spp_rel_rarity.csv'),
col_types = 'dddd')
loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
rare_rast_0pct <- subs(loiczid_rast, spp_rarity,
by = 'loiczid', which = 'rarity_0pct')
rare_rast_60pct <- subs(loiczid_rast, spp_rarity,
by = 'loiczid', which = 'rarity_60pct')
rare_rast_probwt <- subs(loiczid_rast, spp_rarity,
by = 'loiczid', which = 'rarity_probwt')
plot(rare_rast_0pct, main = 'Range rarity, All species, any presence')plot(rare_rast_60pct, main = 'Range rarity, All species, prob of occur >= 60%')plot(rare_rast_probwt, main = 'Range rarity, All species, weighted by prob of occur')### relative range rarity
rel_rare_rast_0pct <- subs(loiczid_rast, spp_rel_rare,
by = 'loiczid', which = 'rel_rare_0pct')
rel_rare_rast_60pct <- subs(loiczid_rast, spp_rel_rare,
by = 'loiczid', which = 'rel_rare_60pct')
rel_rare_rast_probwt <- subs(loiczid_rast, spp_rel_rare,
by = 'loiczid', which = 'rel_rare_probwt')
plot(rel_rare_rast_0pct, main = 'Rel range rarity, All spp, any presence')plot(rel_rare_rast_60pct, main = 'Rel range rarity, All spp, prob of occur >= 60%')plot(rel_rare_rast_probwt, main = 'Rel range rarity, All spp, weighted by prob of occur')As in the species richness and rarity calculations, we will calculate mean risk using three different considerations of species presence: all presence (non-zero probability of occurrence), “core habitat” (60% or greater prob of occurrence), and probability-weighted.
This binds rows into a long dataframe, but the saved file is rather large. By rounding the decimals a bit, we can shave off quite a bit of file size.
iucn_version <- '2017-3'
cell_risk_file <- file.path(dir_data, sprintf('risk_by_cell_all_%s.csv', iucn_version))
iucn_to_am_lookup <- read_csv(file.path(dir_setup, 'int',
'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_current_risk <- read_csv(file.path(dir_data,
sprintf('iucn_risk_current_%s.csv', iucn_version))) %>%
select(iucn_sid, cat, cat_score) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells_assessed.csv'))##
Read 43.3% of 16116125 rows
Read 79.4% of 16116125 rows
Read 16116125 rows and 3 (of 3) columns from 0.338 GB file in 00:00:04
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid'] %>%
filter(!is.na(cat_score) & !is.na(iucn_sid))
spp_cells_risk_sum_0pct <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
# filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(),
mean_risk = mean(cat_score),
var_risk = var(cat_score),
presence = '0%')
spp_cells_risk_sum_60pct <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(),
mean_risk = mean(cat_score),
var_risk = var(cat_score),
presence = '60%')
### For the prob weighted calculations, weight the mean and var:
### mean = 1/n * sum(y)
### ==> mean = 1/sum(prob) * sum(y * prob)
### var = 1/n * sum[y - E(y)]^2
### ==> var = 1/sum(prob) * sum([y - E(y)]^2 * prob)
spp_cells_risk_sum_probwt <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
# filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = sum(prob),
mean_risk = 1/n_spp * sum(cat_score * prob),
var_risk = 1/n_spp * sum((cat_score - mean_risk)^2 * prob),
presence = 'prob')
spp_cells_risk_sum <- bind_rows(spp_cells_risk_sum_0pct,
spp_cells_risk_sum_60pct,
spp_cells_risk_sum_probwt) %>%
mutate(var_risk = ifelse(mean_risk == 0 & is.na(var_risk), 0, var_risk),
mean_risk = round(mean_risk, 6),
var_risk = round(var_risk, 6))
### if mean risk == 0, all risks are zero, so zero variance.
write_csv(spp_cells_risk_sum, cell_risk_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
spp_cells_risk_sum <- read_csv(cell_risk_file,
col_types = 'idddc')
cuts <- c('0%', '60%', 'prob')
for(cut in cuts) { ### cut <- cuts[3]
df_cut <- spp_cells_risk_sum %>%
filter(presence == cut)
cat(sprintf('#### Mean, variance, and richness based on %s', cut))
risk_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'mean_risk')
var_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'var_risk')
nspp_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'n_spp')
plot(risk_rast, main = sprintf('Mean risk, cut = %s', cut))
plot(var_rast, main = sprintf('Variance of risk, cut = %s', cut))
plot(nspp_rast, main = sprintf('Assessed richness, cut = %s', cut))
}Again we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or trend_score.
cell_trend_file <- file.path(dir_data, 'spp_trend_by_loiczid.csv')
iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_trend <- read_csv(file.path(dir_data, 'trend_by_spp.csv')) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
if(!exists('spp_cells')) {
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_trend <- spp_trend[spp_cells, on = 'am_sid']
spp_cells_trend_sum <- spp_cells_trend %>%
filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(), mean_trend = mean(trend_score))
write_csv(spp_cells_trend_sum, cell_trend_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
### gotta force the mean_trend column to be double; there are lots of zero
### values so will default to thinking that column is integer.
trend_by_cell <- read_csv(file.path(dir_data, 'spp_trend_by_loiczid.csv'),
col_types = 'iid')
trend_rast <- subs(loiczid_rast, trend_by_cell, by = 'loiczid', which = 'mean_trend')
nspp_tr_rast <- subs(loiczid_rast, trend_by_cell, by = 'loiczid', which = 'n_spp')
plot(trend_rast)
plot(nspp_tr_rast)
trend_clipped <- trend_rast
values(trend_clipped)[values(nspp_tr_rast) < 5] <- NA
plot(trend_clipped)Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.
cell_risk_by_range_file <- c(file.path(dir_data, 'risk_by_cell_by_range.csv'))
iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_ranges <- read_csv(file.path(dir_data, 'am_spp_range_summary.csv')) %>%
arrange(area_km2) %>%
mutate(cum_area = cumsum(area_km2),
range_gp = case_when(cum_area <= max(cum_area) * .33 ~ 'small',
cum_area <= max(cum_area) * .67 ~ 'medium',
TRUE ~ 'large')) %>%
select(am_sid, iucn_sid, area_km2, range_gp)
# summary(spp_ranges %>% filter(range_gp == 'small') %>% .$area_km2)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 2522 400481 1180425 2192586 3739745 7681908
# summary(spp_ranges %>% filter(range_gp == 'medium') %>% .$area_km2)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 7690359 8286343 9184440 15440048 12563147 103892977
# summary(spp_ranges %>% filter(range_gp == 'large') %>% .$area_km2)
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 105087230 136623452 159164037 167382766 196088390 245559322
spp_current_risk <- read_csv(file.path(dir_data, 'iucn_spp_cat_current.csv')) %>%
select(iucn_sid, cat, cat_score) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
left_join(spp_ranges) %>%
as.data.table()
if(!exists('spp_cells')) {
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']
spp_cells_risk_sum <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid, range_gp) %>%
summarize(n_spp = n(),
mean_risk = mean(cat_score))
write_csv(spp_cells_risk_sum, cell_risk_by_range_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell_by_range <- read_csv(file.path(dir_data, 'risk_by_cell_by_range.csv'),
col_types = 'dcdd')
for(range_size in c('small', 'medium', 'large')) {
### range_size <- 'medium'
tmp <- risk_by_cell_by_range %>%
filter(range_gp == range_size) %>%
filter(n_spp >= 5)
risk_rast <- subs(loiczid_rast, tmp,
by = 'loiczid', which = 'mean_risk')
nspp_rast <- subs(loiczid_rast, tmp,
by = 'loiczid', which = 'n_spp')
plot(risk_rast, main = paste0('Mean risk for spp w/range ', range_size, ' (n >= 5)'))
plot(nspp_rast, main = paste0('Spp count for spp w/range ', range_size, ' (n >= 5)'))
}Here we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or cat_score.
cell_risk_by_taxa_file <- c(file.path(dir_data, 'risk_by_cell_by_taxa.csv'))
iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_taxa <- read_csv(file.path(dir_data, 'am_spp_taxa.csv')) %>%
select(am_sid, iucn_sid, taxa) %>%
distinct()
spp_current_risk <- read_csv(file.path(dir_data, 'iucn_spp_cat_current.csv')) %>%
select(iucn_sid, cat, cat_score) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
left_join(spp_taxa) %>%
as.data.table()
if(!exists('spp_cells')) {
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'))
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid']
spp_cells_risk_sum <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid, taxa) %>%
summarize(n_spp = n(),
mean_risk = mean(cat_score))
write_csv(spp_cells_risk_sum, cell_risk_by_taxa_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
### gotta force the mean_risk column to be double; there are lots of zero
### values so will default to thinking that column is integer.
risk_by_cell_by_taxa <- read_csv(file.path(dir_data, 'risk_by_cell_by_taxa.csv'),
col_types = 'dcdd')
taxa_list <- risk_by_cell_by_taxa$taxa %>% unique()
for(taxa_gp in taxa_list) {
### taxa_gp <- 'mammal'
tmp <- risk_by_cell_by_taxa %>%
filter(taxa == taxa_gp) %>%
filter(n_spp >= 0)
risk_rast <- subs(loiczid_rast, tmp,
by = 'loiczid', which = 'mean_risk')
nspp_rast <- subs(loiczid_rast, tmp,
by = 'loiczid', which = 'n_spp')
plot(risk_rast, main = paste0('Mean risk for spp in ', taxa_gp, ' (n >= 0)'))
plot(nspp_rast, main = paste0('Spp count for spp in ', taxa_gp, ' (n >= 0)'))
}# prov_wrapup(commit_outputs = FALSE)